package com.boot.pachong.task;

import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.*;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.*;
import java.util.stream.Collectors;


@Slf4j
public class YongJiuProcessor implements PageProcessor {



    RedisTemplate<String, Object> redisTemplate;
    public YongJiuProcessor(RedisTemplate<String, Object> redisTemplate) {
        this.redisTemplate = redisTemplate;
    }

    @Override
    public void process(Page page) {
//      如果是首页
        String url = page.getUrl().get();
        if (Objects.equals(url, "http://www.yongjiuzy1.com/")) {
            List<String> pages = getPages(page);
            page.addTargetRequests(pages);
            List<String> details = getDetails(page);
            page.addTargetRequests(details);
        } else
//            列表页
            if (url.contains("vod-index-pg-")) {
                List<String> pages = getPages(page);
                page.addTargetRequests(pages);
                List<String> details = getDetails(page);


                page.addTargetRequests(details);
            } else
//            详情页面
            /**
             * {
             "vod_id": "99326",
             "vod_cid": "17",
             "vod_name": "卡米尼归来 2020 S01E03 Hindi ",
             "vod_title": "",
             "vod_type": "",
             "vod_keywords": "",
             "vod_actor": "Gehana Vasisth, Arohi Barde, Sagar Kumar, Adil Khan",
             "vod_director": "",
             "vod_content": "IMDB评分：不适用<br />导演：不适用<br />发行日期：2020年12月11日（印度）<br />类型：剧情片，爱情片<br />语言：印地语<br />电影明星：Gehana Vasisth，Arohi Barde，Sagar Kumar，Adil Khan<br />品质：720p HDRip<br />档案大小：210MB",
             "vod_pic": "https://tu.tianzuida.com/pic/upload/vod/2020-12-13/202012131607829359.jpg",
             "vod_area": "印度",
             "vod_language": null,
             "vod_year": "2020",
             "vod_addtime": "2020-12-13 11:16:32",
             "vod_filmtime": 0,
             "vod_server": "",
             "vod_play": "down",
             "vod_url": "HD高清$http://yiqi.xiazaizuida.com/20201213/7977_61a70576/Kamini Returns 2020 S01E03 Hindi Balloons Original Web Series  720p HDRip 210MB.mp4",
             "vod_inputer": null,
             "vod_reurl": "http://www.zuidazy.net/?m=vod-detail-id-99326.html",
             "vod_length": 0,
             "vod_weekday": null,
             "vod_copyright": 0,
             "vod_state": "",
             "vod_version": "",
             "vod_tv": "",
             "vod_total": 0,
             "vod_continu": "HD高清",
             "vod_status": 1,
             "vod_stars": 0,
             "vod_hits": null,
             "vod_isend": 1,
             "vod_douban_id": 0,
             "vod_series": "",
             "list_name": "伦理片"
             }
             */
                if (url.contains("vod-detail-id-")) {
                    Map<String, Object> map = new HashMap<>();
                    int indexOf = url.substring(43).lastIndexOf('.');
                    map.put("vod_id", url.substring(43).substring(0, indexOf));
                    map.put("page_url", url);
                    map.put("vod_name", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[1]/text()").get());
                    map.put("vod_alias_name", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[2]/text()").get());
                    map.put("vod_marker", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[3]/text()").get());
                    map.put("vod_actor", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[4]/text()").get());
                    map.put("vod_director", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[5]/text()").get());
                    map.put("classification", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[6]/div[1]/text()").get());
                    map.put("vod_type", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[6]/div[2]/text()").get());
                    map.put("vod_lange", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[7]/div[1]/text()").get());
                    map.put("vod_area", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[7]/div[2]/text()").get());
                    map.put("vod_status", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[8]/div[1]/text()").get());
                    map.put("vod_year", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[8]/div[2]/text()").get());
                    map.put("vod_add_time", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[9]/div[1]/text()").get());
                    map.put("vod_douban", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[2]/li[9]/div[2]/text()").get());
                    map.put("vod_img", page.getHtml().xpath("/html/body/div[4]/div[1]/div/div[1]/img/@src").get());
//

                    int num = 1;
                    Map<String, String> urlyun = new HashMap<>();
                    Map<String, String> urlm3u8 = new HashMap<>();
                    while (page.getHtml().xpath("/html/body/div[4]/div[2]/div/div/ul/li[" + num + "]").get() != null) {
                        if (page.getHtml().xpath("/html/body/div[4]/div[2]/div/div/ul/li[" + num + "]/input").get() != null) {
                            String urlSouce = page.getHtml().xpath("/html/body/div[4]/div[2]/div/div/ul/li[" + num + "]/a/text()").get();
                            String name = page.getHtml().xpath("/html/body/div[4]/div[2]/div/div/ul/li[" + num + "]/a/span/text()").get();
                            if (urlSouce != null && urlSouce.contains(".m3u8")) {
                                urlm3u8.put(name, urlSouce);
                            } else if (urlSouce != null) {
                                urlyun.put(name, urlSouce);
                            }
                        }
                        num++;
                    }
                    map.put("vod_play_list_yun", urlyun);
                    map.put("vod_play_list_m3u8", urlm3u8);


                    redisTemplate.opsForZSet().add("zdetail", new HashSet<ZSetOperations.TypedTuple<Object>>(){{
                        add(new DefaultTypedTuple<>(map, Double.valueOf(map.get("vod_id") + "")));
                    }});


                    log.info("url={}已保存到redis", url);
                }
    }

    /**
     * 获取详情页url
     *
     * @param page
     * @return
     */
    public List<String> getPages(Page page) {
        List<String> pages = page.getHtml().xpath("/html/body/div[4]/div[1]/div/a").links().all();
        Set<ZSetOperations.TypedTuple<Object>> set = new HashSet<>();
        for (String s : pages) {
            String s1 = s.split("-")[3];
            int indexOf = s1.lastIndexOf(".");
            String substring = s1.substring(0, indexOf);
            ZSetOperations.TypedTuple<Object> typedTuple = new DefaultTypedTuple<>(s, Double.valueOf(substring));
            set.add(typedTuple);
        }
        redisTemplate.opsForZSet().add("zpages", set);
        return pages;
    }

    /**
     * 获取页面 url
     *
     * @param page
     * @return
     */
    public List<String> getDetails(Page page) {
        List<String> all = page.getHtml().xpath("//*[@id=\"data_list\"]/tr/td/a").links().all();
        List<String> details = all.stream().filter(e -> e.contains("vod-detail-id-")).collect(Collectors.toList());
        Set<ZSetOperations.TypedTuple<Object>> set = new HashSet<>();
        for (String s : details) {
            String s1 = s.split("-")[3];
            int indexOf = s1.lastIndexOf(".");
            String substring = s1.substring(0, indexOf);
            ZSetOperations.TypedTuple<Object> typedTuple = new DefaultTypedTuple<>(s, Double.valueOf(substring));
            set.add(typedTuple);
        }
        redisTemplate.opsForZSet().add("zdetails", set);
        return details;
    }

    @Override
    public Site getSite() {
        return Site
                .me()
                .setSleepTime(2000)
                .setUserAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
    }


}
